# -*- coding: utf-8 -*-
import pendulum
from airflow.exceptions import AirflowSkipException
from datetime import timedelta
from ai_ageing_cusc_hi.dwd.dwd_warhouse.dwd_waybill_change_base_hi import jms_dwd__dwd_waybill_change_base_hi
from utils.operators.spark_submit import SparkSubmitOperator

cst = pendulum.timezone('Asia/Shanghai')
class aiWideSparkSubmitOperator(SparkSubmitOperator):

    def pre_execute(self, context):
        day = cst.convert(context['ti'].execution_date) + timedelta(hours=6)

        schedule_date = ['00']

        if day.strftime('%H') in schedule_date:
            print(f'{day.strftime("%H")} in {schedule_date}, should skip')
            super().pre_execute(context)
            raise AirflowSkipException()
        else:
            print(f'{day.strftime("%H")} not in {schedule_date}, run now')
            super().pre_execute(context)

jms_dwd__dwd_wide_rank_basic_scaninfo_tms_hi = aiWideSparkSubmitOperator(
    task_id='jms_dwd__dwd_wide_rank_basic_scaninfo_tms_hi',
    pool_slots=9,
    task_concurrency=1,  # 如果任务不支持并发，则将 task_concurrency 设为 1
    name='jms_dwd__dwd_wide_rank_basic_scaninfo_tms_hi_{{ execution_date | hour_add(1) | cst_hour }}',  # yarn 任务名称
    driver_memory='38G',
    executor_memory='20G',
    executor_cores=10,
    retries=0,
    num_executors=100,
    email=['rongguangfan@jtexpress.com','yl_bigdata@yl-scm.com'],
    conf={'spark.executor.memoryOverhead': 3072,
          'spark.core.connection.ack.wait.timeout': 3000,
          'spark.default.parallelism': 3600,
          'spark.sql.shuffle.partitions': 3600,
          'spark.locality.wait': 60,
          'spark.network.timeout': 300,
          'spark.driver.maxResultSize': 20,
          'driver.cores': 14,
          'spark.yarn.queue': 'default',
          'spark.shuffle.consolidateFiles': 'true',
          #'spark.sql.parquet.compression.codec': 'gzip'
          'spark.shuffle.memoryFraction': '0.8',
          'spark.shuffle.file.buffer':'64k',
          'spark.executor.extraJavaOptions': '-XX:+UseG1GC -XX:ParallelGCThreads=10 -XX:ConcGCThreads=2'
          #'spark.sql.autoBroadcastJoinThreshold': 3221225472
          },
    java_class='com.yunlu.bigdata.jobs.udaf.YlBarscanToTraceHour',  # spark 主类
    application='hdfs:///scheduler/ai_ageing_cusc_hi/spark/dwd_warhouse/dwd_wide_rank_basic_scaninfo_tms_hi/jobs-1.0-SNAPSHOT.jar',
    # spark jar 包
    application_args=['{{ execution_date | cst_ds }}', '39', '30'],
    # 参数dt 2020-10-26
    execution_timeout=timedelta(hours=2),
)

jms_dwd__dwd_wide_rank_basic_scaninfo_tms_hi  << [
    jms_dwd__dwd_waybill_change_base_hi
]